Regressão Linear com NumPy

Imports:


In [1]:
%matplotlib inline
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

import numpy as np
from numpy import *
import sys

Compute RSS:


In [2]:
def compute_RSS(H, w, y):
	mat = y - np.dot(H, w)
	rss = np.sum( np.transpose( mat ) * mat ) 
	return rss

Compute norma:


In [3]:
def compute_norma(vector):
	norma = np.sqrt( np.sum( vector ** 2 ) )
	return norma

Compute step gradient:


In [4]:
def step_gradient(H, w_current, y, learning_rate):
	partial = np.sum( np.transpose(H) * ( y - np.dot(H, w_current) ), axis = 1 )
	
	norma = compute_norma(partial)
	
	w = w_current + ( 2 * learning_rate * partial )
	
	return [w, norma]

Compute complete gradient descent:


In [14]:
def gradient_descent(H, y, learning_rate, epsilon):
	w = np.zeros((H.shape[1])) #has the same size of output
	rss_total = []
	rss_by_step = 0
	norma_total = []
	norma = epsilon+1
	num_iterations = 0
	
	while(norma > epsilon):
		[w, norma] = step_gradient(H, w, y, learning_rate)
		num_iterations += 1
		if num_iterations % 10 == 0:
			rss_by_step = compute_RSS(H, w, y)
			rss_total.append(rss_by_step)
			norma_total.append(norma)
	
	return [w, num_iterations, rss_total, norma_total]

Running the multiple linear regression:


In [15]:
input_filename = "sample_treino.csv"
learning_rate = 0.00003
epsilon = 0.000001

att = genfromtxt(input_filename, delimiter=",", skip_header=1)
H = att[:,0:-1] # Get content to be trained
y = att[:,-1] # Get column of predict variable
H_with_ones = np.c_[np.ones(len(H)), H]

[w, num_iterations, rss_total, norma_total] = gradient_descent(H_with_ones, y, learning_rate, epsilon)

print("\n\nNum iterations: {0}\nRSS: {1}\nW: {2}".format(num_iterations, rss_total[-1], w))



Num iterations: 377096
RSS: 36.197707852
W: [ 1.73770989  0.10304146  0.04643678  0.16409834  0.38117851  0.02027818]

Computing the same values with Scikit-learn


In [16]:
reg = LinearRegression()
reg.fit(H, y)
print("\nCoef with scikit-learn: {0}".format(reg.coef_))
print("Intercept with scikit-learn: {0}\n".format(reg.intercept_))


Coef with scikit-learn: [ 0.10304143  0.0464367   0.16409834  0.38117843  0.02027816]
Intercept with scikit-learn: 1.73771151379

Plot graphics about the traning


In [23]:
plt.plot(rss_total, label = "RSS")
plt.plot(norma_total, label = "Norma")
plt.xlabel("Iteraction")
plt.ylabel("y values")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[23]:
<matplotlib.legend.Legend at 0x7f1f4959df90>